
######## Analysis of P0-2 promoter mutants (from EE25) reconstituted in the ancestor strain
#media: for growth check: E, C, B medium
# [ B=0.1%, C=0.01%, E=0%] supplemented with 0.1% CAS
## all strains are IT049 = IS- strain background; one mutant in MS022 (IS+) background

#Figure 6 D and E plots are indicated below.

###################################################

# Q: are the promoter mutations adaptive? 
###### calculates STEADY STATE FLUORESCENE best fit (max r^2) with sliding window


######read in data
setwd("/Users/itomanek/Documents/promoter_evolution/experiments/platereader_data/2020_September") 

#read data
OD= read.csv2("p02_reconst_OD600.txt", header=TRUE, sep= "\t")
YFP=read.csv2("p02_reconst_YFP.txt", header=TRUE, sep= "\t")
CFP= read.csv2("p02_reconst_CFP.txt", header=TRUE, sep= "\t")


##############################################################################
###Plotting of single growth curves with strain information 

##########rearrange the data so that each row contains the value for one 
#read of one well
library(reshape2)

#With melt, you specify which columns are identity variables, 
#and which columns are measured variables
reshaped_A <- melt(OD, id=c("Time"), variable.name="Well", value.name="OD600")
yfp_reshaped_A= melt(YFP, id=c("Time"), variable.name="Well",value.name="YFP")
cfp_reshaped_A= melt(CFP, id=c("Time"), variable.name="Well", value.name="CFP")

##########transform the time to minues (platereader has format 00:00:00)
time<-reshaped_A[,1]
time=as.character(time)
time=sapply(strsplit(time,":"),
            function(x) {
              x <- as.numeric(x)
              x[1]+x[2]/60
            })
time=round(time,2)
time #in hours (with minutes as comma)

reshaped_A$Time=time  #replace the old time format with time in min 
yfp_reshaped_A$Time=time  #replace the old time format with time in min 
cfp_reshaped_A$Time=time  #replace the old time format with time in min 


###### PLATE INFO (TEMPLATE)  ########

#read in the plate template - i.e. metadata, additional info
plate_info=read.csv2("plate_info_p02_reconst.txt", header=TRUE, sep="\t")

# format of plate_template: well, strain 
head(plate_info)
subset(plate_info, medium =="C") #4 replicates each

#combine reshaped and plate_info
#install.packages("dplyr")  ##info: https://cran.rstudio.com/web/packages/dplyr/vignettes/introduction.html
library("dplyr")
annotated_A <- inner_join(reshaped_A, plate_info, by="Well", copy=TRUE)

#join all data to existing data frame "annotated"
annotated_A<- inner_join(annotated_A, yfp_reshaped_A, by= c("Time","Well"))
annotated_A <- inner_join(annotated_A, cfp_reshaped_A, by=c("Time","Well"))
head(annotated_A)
###### Group data ###############################################

grouped_A <- group_by(annotated_A,Time, Well, strain, mutation, medium)
#do not use na.omit on grouped if having find(GR)function > differences in wellnumber
################## PLOTS ##########################
#install.packages("ggplot2")
library(ggplot2)
#install.packages("Hmisc")
library("Hmisc")
greys <-c("black","#4D4D4D", "#888888", "#AEAEAE", "#CCCCCC")

###############  ############### ###############

#growth
ggplot() + 
  geom_line(data=subset(grouped_A), aes(x=Time, y=OD600, group=Well, color=as.factor(medium), linetype=as.factor(medium)))+ 
  theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 

#growth in E
ggplot() + 
  geom_line(data=subset(grouped_A,medium=="E"), aes(x=Time, y=OD600, group=Well, color=as.factor(mutation), linetype=as.factor(medium)))+ 
  theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 
#2 ancestors are outliers (but just after exponential phase, looks ok)

#growth in C
ggplot() + 
  geom_line(data=subset(grouped_A,medium=="C"), aes(x=Time, y=OD600, group=Well, color=as.factor(mutation), linetype=as.factor(medium)))+ 
  geom_line(data=subset(grouped_A,medium=="E" & mutation =="anc"), aes(x=Time, y=OD600, group=Well, color=as.factor(mutation), linetype=as.factor(medium)))+ 
  theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 
#nice

#YFP-OD in E
ggplot() + 
  geom_point(data=subset(na.omit(grouped_A),medium=="E" & OD600 <0.3 & strain !="MS022"), aes(x=OD600, y=YFP, group=Well, color=as.factor(mutation), linetype=as.factor(medium)))+ 
  theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 
#

#growth in B
ggplot() + 
  geom_line(data=subset(grouped_A,medium=="B"), aes(x=Time, y=OD600, group=Well, color=as.factor(mutation), linetype=as.factor(medium)))+ 
 # geom_line(data=subset(grouped_A,medium=="E" & mutation =="anc"), aes(x=Time, y=OD600, group=Well, color=as.factor(mutation), linetype=as.factor(medium)))+ 
  theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 
#less obvious, but GR should differ

#growth in B - individual strains
ggplot() + 
  geom_line(data=subset(grouped_A,medium=="B" & mutation =="anc"), aes(x=Time, y=OD600, group=Well, color=as.factor(mutation), linetype=as.factor(medium)))+ 
  geom_line(data=subset(grouped_A,medium=="B" & mutation =="F2"), aes(x=Time, y=OD600, group=Well, color=as.factor(mutation), linetype=as.factor(medium)))+ 
   theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 
#more obvious,  GR  differs

#growth comparison all media
ggplot() + 
  geom_line(data=subset(grouped_A,medium=="C" ), aes(x=Time, y=OD600, group=Well, color=as.factor(mutation), linetype=as.factor(medium)))+ 
   geom_line(data=subset(grouped_A,medium=="E"), aes(x=Time, y=OD600, group=Well, color=as.factor(mutation), linetype=as.factor(medium)))+ 
  #geom_line(data=subset(grouped_A,medium=="B"), aes(x=Time, y=OD600, group=Well, color=as.factor(mutation), linetype=as.factor(medium)))+ 
   theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 
#less obvious, but GR should differ

#CFP
ggplot() + 
  geom_line(data=subset(grouped_A), aes(x=Time, y=CFP, group=Well, color=as.factor(medium), linetype=as.factor(medium)))+ 
  theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 

#YFP
ggplot() + 
  geom_line(data=na.omit(subset(grouped_A,medium=="E")), aes(x=Time, y=YFP, group=Well, color=as.factor(mutation), linetype=as.factor(medium)))+ 
  theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 

##Figure 6 D plot
library("viridis")
sum_YFP <-ddply(grouped_A, c("Time","medium","strain","mutation"),summarise, mean_YFP=mean(na.omit(YFP/OD600)), sdy=sd(na.omit(YFP/OD600)))
head(sum_YFP)
#YFPOD600 sum plot
ggplot() + scale_color_viridis(discrete = TRUE, option = "D")+
  geom_point(data=na.omit((subset(sum_YFP,medium=="E"&strain!="22"))), aes(x=Time, y=mean_YFP,color=as.factor(mutation)))+ #linetype=as.factor(strain) 
  geom_errorbar(data=na.omit(subset(sum_YFP, medium =="E"&strain!="22"),col="grey"), aes(x=Time, y=mean_YFP, ymin=mean_YFP-sdy,ymax=mean_YFP+sdy,  color=as.factor(mutation)),alpha=0.4) +
  theme_bw()+theme(legend.position="none") +#no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 


##### #Growth Rates ######## ## ############# ## #############
# # sliding window looking for best fit of linear model!
## evaluates max. r^2 of fit and saves those values of intercept+slope
## ######################## ## ############# ## #############

#extract+transform time from raw data
time<-OD[,1]
time=as.character(time)
time=sapply(strsplit(time,":"),
            function(x) {
              x <- as.numeric(x)
              x[1]+x[2]/60
            })
time=round(time,2)
time #in hours (with minutes as comma)
OD$Time=time

#### FUNCTION TO CALCULATE GR
library("zoo")
time #use time from 0h til 15h (90 datapoints)
find_GR<- function(OD) { 
  ##get "range" of well names
  rangewell=(as.character(unique(grouped_A$Well)))
  ##create empty list
  max_lm_df <- data.frame()
  head(max_lm_df)
  
  for( j in 1: length(rangewell)) { 
    #define zooOD
    zooOD=data.frame(time[15:110], OD[15:110,j+1]) #zooOD consists of 2 columns, second col.changes in loop 
    names(zooOD) <- c("Time","wellname") #need constant names
    #define lm_dataframe
    lm_df <- NULL
    lm_df <- rollapply(zooOD, width=20, by=1, FUN = function(Z) {  #width=50 sliding window of 50, by=1... sliding window moves one by one, by=50: mean of every 50 values is calculated
      linmods = lm(formula= log(wellname) ~ Time,data=as.data.frame(Z)); #as.data frame not zoo format here
      return(linmods$coeff); 
    },by.column=FALSE, align="left") #align=whether output indices are start, end, center of sliding window
    max_lm_df[j,1] <- rangewell[j] #well name
    max_lm_df[j,2] <- lm_df[which.max(lm_df[,2]),1]  # attach max slope+corresp.intercept to list 
    max_lm_df[j,3] <- lm_df[which.max(lm_df[,2]),2]
    # max_lm_list[j,4] <- lm_df  ##(could store all the slope values if max_lm was list)#tip: get individual dfs out of list with lapply!
  }
  names(max_lm_df) <- c("Well","GR_i","GR_s")  ##-rename headers for simplicity
  return(max_lm_df)
} #function end

#run find_GR function for each 96-well plate (=medium)
max_lm_df <- find_GR(OD)
#for debugging:
grouped_A <- group_by(annotated_A,Time, Well, strain, mutation, medium)
#dont! grouped_A<- na.omit(grouped_A)#will have fewer wells than OD
#add GR
grouped_A<- inner_join(grouped_A, max_lm_df, by= c("Well")) #join by Well

## check GR
##check GR
plot(OD$Time,log(OD$C12)) #C1-C12 "H12" mutation
abline(subset(grouped_A,Well=="C12")$GR_i,subset(grouped_A,Well=="C12")$GR_s) #yes now it works properly
#plate_info
##  the ancestral wells works  now (B1-B12)
plot(OD$Time,log(OD$B12)) #B1-B12 "anc" 
abline(subset(grouped_A,Well=="B12")$GR_i,subset(grouped_A,Well=="B12")$GR_s) #yes now it works properly
#yes!
plot(OD$Time,log(OD$B2)) #B1-B12 "anc" 
abline(subset(grouped_A,Well=="B2")$GR_i,subset(grouped_A,Well=="B2")$GR_s) #yes now it works properly
#yes!

#GR and curves
ggplot(subset(grouped_A, (OD600) >= 0.09 ))+
  geom_line(aes(x=Time, y=OD600,color= GR_s, group=Well)) #! looks ok!

ggplot(subset(grouped_A, mutation =="anc"))+
  geom_line(aes(x=Time, y=OD600,color= GR_s, group=Well)) #! looks ok!
# ok

ggplot(subset(grouped_A, mutation =="A11"))+
  geom_line(aes(x=Time, y=OD600,color= GR_s, group=Well)) #! looks ok!
#ok
ggplot(subset(grouped_A, mutation =="F2"))+
  geom_line(aes(x=Time, y=OD600,color= GR_s, group=Well)) #! looks ok!
#ok
ggplot(subset(grouped_A, mutation =="H12"))+
  geom_line(aes(x=Time, y=OD600,color= GR_s, group=Well)) #! looks ok!
# ok!
unique(grouped_A$mutation)



####### Statistics ##############################################
library("plyr")

###for ERROR BAR CALCULATION ##########
#SSF cfp
sum_GR <-ddply(grouped_A, c("medium","strain","mutation"),summarise, mean_GR=mean(na.omit(GR_s)), sdc=sd(na.omit(GR_s)), semc=sd(na.omit(GR_s))/sqrt(length(na.omit(GR_s))))

sum_data <- ddply(grouped_A,c("Time","medium","strain","mutation"),summarise,mean_OD600=mean(OD600),mean_YFP=mean(YFP), mean_CFP=mean(CFP),sdo=sd(OD600),sdy=sd(YFP),sdc=sd(CFP))

############# more PLOTS ###################
nicep <- theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15))



# GR - medium - mutant
ggplot() + 
  geom_point(data=(subset(grouped_A, strain!="22")), aes(x=(medium), y=GR_s,  color=as.factor(mutation))) +
  theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 

# GR medium - mutant stats
ggplot() + 
  geom_point(data=na.omit(subset(sum_GR, strain!="22")), aes(x=(medium), y=mean_GR,  color=as.factor(mutation))) +
  geom_errorbar(data=na.omit(subset(sum_GR, strain!="22")), aes(x=(medium), y=mean_GR, ymin=mean_GR-sdc,ymax=mean_GR+sdc,  color=as.factor(mutation))) +
  theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 

#Figure 6 E plot
#GR - medium - mutant
ggplot(data=subset(sum_GR,strain!="22"), aes(x=medium, y=mean_GR,ymin=mean_GR-sdc, ymax=mean_GR+sdc, fill=mutation))+ 
scale_fill_viridis(discrete = TRUE, option = "D")+
  geom_bar(stat="identity", position=position_dodge())+
  geom_errorbar(position=position_dodge(),col="black")+
  theme_bw()+ ggtitle("mean growth rate")+ #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15))

